For the reader: You can click the black “code” box on the right hand side to either see or hide the R code behind analysis results and graphs.

The results are arranged to separate tabs by site/area, so that it’s easy to look at just the results of the site of interest, instead of scrolling accidentally down to a different site and getting confused.

The results for each site consist of four parts: pXRF, particle size distribution, loss on ignition, and colorimetry.

Sites

Ashdod-Yam

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AY.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Mn 
##     0     0     1     0     0     0    43     1     0     0     0    28     0 
##    Fe    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Nb    Ba 
##     0     1     0     0    27     0     0     2     0    31    45

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-V) %>% 
        select(-As) %>% 
        select(-Nb) %>% 
        select(-Ba)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 7), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.3836 1.2233 1.0086 0.84957 0.60483 0.49903 0.44478
## Proportion of Variance 0.5719 0.1506 0.1024 0.07265 0.03682 0.02507 0.01991
## Cumulative Proportion  0.5719 0.7225 0.8249 0.89752 0.93434 0.95941 0.97932
##                           PC8     PC9    PC10
## Standard deviation     0.3538 0.22250 0.17534
## Proportion of Variance 0.0126 0.00498 0.00309
## Cumulative Proportion  0.9919 0.99691 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by area")

autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by sample type")

PCA(pxrf_final[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 47 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=7)

area <- as.factor(pxrf_final[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
rect.dendrogram(dend, 7, border = "Black")
legend("topright", legend = levels(area), fill = cols_a)

# HCA dendrogram, samples color coded by type:
dend2 <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=7)

type <- as.factor(pxrf_final[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
col_type <- cols_t[type]
labels_colors(dend2) <- col_type[order.dendrogram(dend2)]

plot(dend2, main="HCA with sample types")
rect.dendrogram(dend, 7, border = "Black")
legend("topright", legend = levels(type), fill = cols_t)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=2)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_AY.xlsx", sep=";")
tga <- read.xlsx("data/tga_AY.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

LOI dataset:

loi[c(8:9,13:14)]
##                    context        type     c550       c950
## AY-1      Area D acropolis    mudbrick 2.088275  4.8891890
## AY-2      Area D acropolis    mudbrick 2.210488  1.9841768
## AY-3      Area D acropolis    mudbrick 1.360910  2.2488113
## AY-4      Area D acropolis    mudbrick 1.430588  1.8964316
## AY-5      Area D acropolis    mudbrick 1.338035  3.3256238
## AY-6      Area D acropolis mud plaster 1.480312  1.2721627
## AY-7      Area D acropolis    mudbrick 1.430335  3.1305245
## AY-8      Area D acropolis mud plaster 1.649521  1.9801623
## AY-9      Area D acropolis    mudbrick 2.368712  3.9045130
## AY-10     Area D acropolis    mudbrick 1.049666  3.1941032
## AY-11     Area D acropolis    mudbrick 1.082732  1.9786697
## AY-12     Area D acropolis         PEM 1.183502  0.7730823
## AY-13     Area D acropolis    mudbrick 1.389532  4.5525165
## AY-14     Area D acropolis    mudbrick 1.787980  2.7523295
## AY-15     Area D acropolis    mudbrick 2.233289  2.7459408
## AY-16     Area D acropolis    mudbrick 1.026364  0.9583722
## AY-17     Area D acropolis    mudbrick 1.569395  3.5356312
## AY-18     Area D acropolis    mudbrick 1.770410  2.6403398
## AY-19     Area D acropolis    mudbrick 1.454623  3.3395017
## AY-20     Area D acropolis mud plaster 1.593025  1.5216091
## AY-21     Area D acropolis    mudbrick 1.729351  1.7560317
## AY-22          Area D wall    mudbrick 1.280392  1.5344272
## AY-23          Area D wall    mudbrick 1.782016  1.9743301
## AY-24          Area D wall    mudbrick 1.697649  1.1877784
## AY-25          Area D wall    mudbrick 1.528936  2.7365019
## AY-26          Area D wall    mudbrick 1.802970  3.6061442
## AY-27          Area D wall mud plaster 1.885674  1.5169278
## AY-28          Area D wall    mudbrick 1.399589  0.7854046
## AY-29          Area D wall mud plaster 1.855100  2.0634749
## AY-30          Area D wall    mudbrick 2.552046  3.4633045
## AY-31          Area D wall    mudbrick 1.455348  3.4109094
## AY-32          Area D wall    mudbrick 1.244529  1.4577882
## AY-33          Area D wall    mudbrick 1.300813  0.8936149
## AY-34          Area D wall    mudbrick 1.581909  1.7044229
## AY-35          Area D wall    mudbrick 1.931867  2.6061513
## AY-36          Area D wall    mudbrick 2.422288  2.5205456
## AY-37          Area D wall    mudbrick 1.139581  1.0607829
## AY-38          Area D wall    mudbrick 2.130927  1.4162417
## AY-39          Area D wall  mud mortar 1.904037  2.5000000
## AY-40          Area D wall mud plaster 2.482920  2.5301253
## AY-41          Area D wall    mudbrick 2.565292  3.7553575
## AY-42   Hellenistic area A    mudbrick 1.593653  1.9123424
## AY-50      Builder's floor         PEM 1.012178  1.5457741
## AY-51      Builder's floor         PEM 1.212985  1.2668590
## AY-52      Builder's floor         PEM 1.763143  2.3906130
## AY-53      Builder's floor         PEM 1.402188  8.0417546
## AY-54      Builder's floor         PEM 1.843655  2.9683284
## AY-50_2    Builder's floor         PEM 1.189794  2.7660912
## AY-51_2    Builder's floor         PEM 1.847393  0.3981500
## AY-52_2    Builder's floor         PEM 1.323619  2.1166626
## AY-53_2    Builder's floor         PEM 1.336354 11.9604443
## AY-54_2    Builder's floor         PEM 2.009187  2.1482146

TGA dataset:

tga[c(8:9,10:11)]
##                      context        type     c550      c950
## AY-1        Area D acropolis    mudbrick 2.398013 2.4471417
## AY-2        Area D acropolis    mudbrick 1.736874 7.9947176
## AY-4        Area D acropolis    mudbrick 1.929141 1.4280310
## AY-3        Area D acropolis    mudbrick 1.233825 3.4734918
## AY-5        Area D acropolis    mudbrick 1.644479 3.5987261
## AY-7        Area D acropolis    mudbrick 1.505973 3.4082173
## AY-8        Area D acropolis mud plaster 1.558115 3.6606406
## AY-9        Area D acropolis    mudbrick 1.927070 3.2099602
## AY-10       Area D acropolis    mudbrick 1.176203 3.8700760
## AY-11       Area D acropolis    mudbrick 1.248990 1.3615058
## AY-12       Area D acropolis         PEM 1.274945 0.7860752
## AY-13       Area D acropolis    mudbrick 1.281796 3.7069429
## AY-14       Area D acropolis    mudbrick 1.971081 2.2591414
## AY-15       Area D acropolis    mudbrick 2.647777 2.4588519
## AY-16       Area D acropolis    mudbrick 1.329581 3.4621816
## AY-17       Area D acropolis    mudbrick 1.631206 4.5833762
## AY-18       Area D acropolis    mudbrick 1.698302 3.6382114
## AY-1 (2)    Area D acropolis    mudbrick 1.940815 2.4887321
## AY-19       Area D acropolis    mudbrick 1.874606 3.2830310
## AY-19 (2)   Area D acropolis    mudbrick 1.913375 1.6886646
## AY-20       Area D acropolis mud plaster 1.877608 2.0755290
## AY-21       Area D acropolis    mudbrick 1.413217 1.8372703
## AY-22            Area D wall    mudbrick 2.099634 2.0131512
## AY-23            Area D wall    mudbrick 1.749414 1.3528300
## AY-24            Area D wall    mudbrick 1.711251 1.4208525
## AY-25            Area D wall    mudbrick 1.938736 1.9671807
## AY-26            Area D wall    mudbrick 1.663242 3.9001094
## AY-27            Area D wall mud plaster 1.696577 1.6560255
## AY-28            Area D wall    mudbrick 1.259601 0.7233976
## AY-29            Area D wall mud plaster 1.724623 3.9961850
## AY-30            Area D wall    mudbrick 2.586865 3.2631063
## AY-31            Area D wall    mudbrick 1.343570 3.0447471
## AY-32            Area D wall    mudbrick 1.336629 1.4660852
## AY-33            Area D wall    mudbrick 1.268116 0.8460754
## AY-34            Area D wall    mudbrick 1.692888 1.4506317
## AY-35            Area D wall    mudbrick 2.002379 2.3973296
## AY-6        Area D acropolis mud plaster 1.659626 1.2887389
## AY-36            Area D wall    mudbrick 2.500488 2.3943098
## AY-37            Area D wall    mudbrick 1.077605 0.7994378
## AY-38            Area D wall    mudbrick 2.164785 1.6976633
## AY-39            Area D wall  mud mortar 1.915888 2.8108623
## AY-40            Area D wall mud plaster 2.618731 2.4977211
## AY-42     Hellenistic area A    mudbrick 1.655597 1.5855926
## AY-50        Builder's floor         PEM 1.347992 1.5505378
## AY-51        Builder's floor         PEM 1.282281 1.6713598
## AY-52        Builder's floor         PEM 1.492264 2.2490706
## AY-42 (2) Hellenistic area A    mudbrick 1.736691 1.6450275
## AY-53        Builder's floor         PEM 1.276679 5.4590326
## AY-53 (2)    Builder's floor         PEM 1.449142 4.6812471
## AY-54        Builder's floor         PEM 1.651196 3.0674290

Manual LOI results

# Colored boxplots in same graph
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") # Reshaping data to long form

ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI by temperature",
        x ="Temperature", y = "LOI")

ggplot(loi_long, aes(x = context, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Traditional LOI by context",
        x ="Context", y = "LOI")

# Removing duplicates from the next graph
loi <- subset(loi[1:47, ])

# Color by context
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by type
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(type), colour = factor(context))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Colored boxplots in same graph
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") # Reshaping data to long form

ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI by temperature",
        x ="Temperature", y = "LOI")

ggplot(tga_long, aes(x = context, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI by context",
        x ="Context", y = "LOI")

# Color by context biplot
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AY.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :37.56   Min.   : 7.97   Min.   :16.81  
##  1st Qu.:42.13   1st Qu.: 9.72   1st Qu.:19.76  
##  Median :43.64   Median :10.38   Median :21.18  
##  Mean   :43.92   Mean   :10.49   Mean   :21.22  
##  3rd Qu.:45.91   3rd Qu.:11.29   3rd Qu.:22.34  
##  Max.   :48.24   Max.   :13.53   Max.   :25.03
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Ashdod-Yam: Byzantine

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AYB.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti    Mn    Fe 
##     0     0     1     0     0     0     1     0     2     0     0     0     0 
##    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Pb 
##     0     0     0     1     1     0     2     0     3

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-As) %>% 
        select(-Pb) 

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 4), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6      PC7
## Standard deviation     2.3569 1.4536 0.9800 0.72263 0.38973 0.17504 1.58e-16
## Proportion of Variance 0.5952 0.2264 0.1029 0.05595 0.01627 0.00328 0.00e+00
## Cumulative Proportion  0.5952 0.8216 0.9245 0.98044 0.99672 1.00000 1.00e+00
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Byzantine elements")

autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Byzantine grouped by sample type")

PCA(pxrf_final[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 7 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by type:
dend2 <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=5)

type <- as.factor(pxrf_final[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
col_type <- cols_t[type]
labels_colors(dend2) <- col_type[order.dendrogram(dend2)]

plot(dend2, main="HCA with sample types")
rect.dendrogram(dend2, 5, border = "Black")
legend("topright", legend = levels(type), fill = cols_t)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_AYB.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_AYB.xlsx", sep=";")
tga <- read.xlsx("data/tga_AYB.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

LOI dataset:

loi[c(8:9,13:14)]
##         context        type      c550      c950
## AY-43 Byzantine     ceiling 2.8231692 13.606389
## AY-44 Byzantine       floor 3.1549254  4.143032
## AY-45 Byzantine PEM (burnt) 9.7500231  6.336877
## AY-46 Byzantine         PEM 3.0153992  1.087679
## AY-47 Byzantine     ceiling 2.2356517  6.463715
## AY-48 Byzantine     ceiling 1.8207856 15.544905
## AY-49 Byzantine         PEM 0.4835757 32.404290

TGA dataset:

tga[c(8:9,10:11)]
##         context        type      c550      c950
## AY-43 Byzantine     ceiling 3.2957418 12.043832
## AY-44 Byzantine       floor 4.3270582  8.653940
## AY-45 Byzantine PEM (burnt) 6.2941554  9.781651
## AY-46 Byzantine         PEM 3.8585514  6.813924
## AY-47 Byzantine     ceiling 2.0075863  7.902190
## AY-48 Byzantine     ceiling 1.8409714 11.731844
## AY-49 Byzantine         PEM 0.3230349 29.724115

Manual LOI results

# Colored boxplots in same graph
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") # Reshaping data to long form

ggplot(loi_long, aes(x = context, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Traditional LOI by context",
        x ="Context", y = "LOI")

# Color by type      
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(context), colour = factor(type))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Colored boxplots in same graph
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") # Reshaping data to long form

ggplot(tga_long, aes(x = context, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI by context",
        x ="Context", y = "LOI")

# Color by type biplot   
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(type))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AYB.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :34.56   Min.   : 2.480   Min.   : 6.98  
##  1st Qu.:38.93   1st Qu.: 5.215   1st Qu.:12.73  
##  Median :45.30   Median : 5.540   Median :13.28  
##  Mean   :46.69   Mean   : 6.211   Mean   :14.56  
##  3rd Qu.:50.95   3rd Qu.: 6.675   3rd Qu.:16.21  
##  Max.   :67.20   Max.   :11.680   Max.   :23.75
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Israel

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_israel.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti    Mn    Fe 
##     0     0     0     0     1     0     3     0     8     0     0     0     0 
##    Ni    Cu    Zn    Rb    Sr     Y    Zr    Nb 
##    11     0     0     6     0    16     0    17

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-Nb) 

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pxrf_MB: Final analysis data set for MUDBRICK samples only (= no floor or lime plaster)

pxrf_MB <- pxrf_final[-c(1:7), ]
rownames(pxrf_MB)
##  [1] "AH-4"  "AH-5"  "RLZ-1" "TI-1"  "TI-10" "TI-2"  "TI-3"  "TI-4"  "TI-5" 
## [10] "TI-6"  "TI-7"  "TI-8"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 7), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.0850 1.3922 1.0863 0.70334 0.64347 0.44563 0.31268
## Proportion of Variance 0.4953 0.2208 0.1344 0.05636 0.04717 0.02262 0.01114
## Cumulative Proportion  0.4953 0.7161 0.8505 0.90687 0.95404 0.97666 0.98780
##                            PC8     PC9    PC10
## Standard deviation     0.24332 0.18520 0.11656
## Proportion of Variance 0.00674 0.00391 0.00155
## Cumulative Proportion  0.99454 0.99845 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Israel grouped by area")

PCA(pxrf_final[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with only mudbrick samples:

# Elements
colnames(pxrf_MB[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_2 <- prcomp(pxrf_MB[3:12])
summary(pca_2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.3606 1.6226 0.7143 0.52086 0.42466 0.34190 0.21405
## Proportion of Variance 0.5941 0.2807 0.0544 0.02892 0.01923 0.01246 0.00488
## Cumulative Proportion  0.5941 0.8748 0.9292 0.95809 0.97732 0.98978 0.99467
##                            PC8     PC9    PC10
## Standard deviation     0.18778 0.10943 0.05281
## Proportion of Variance 0.00376 0.00128 0.00030
## Cumulative Proportion  0.99843 0.99970 1.00000
# PCA plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel elements")

autoplot(pca_2, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Israel grouped by area")

PCA(pxrf_MB[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 12 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=4)

area <- as.factor(pxrf_MB[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
rect.dendrogram(dend, 4, border = "Black")
legend("topright", legend = levels(area), fill = cols_a)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_israel.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2)

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_israel.xlsx", sep=";")
tga <- read.xlsx("data/tga_israel.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

LOI dataset:

loi[c(8:9,13:14)]
##              context                  type     c550      c950
## AH-1        Ad Halom   lime plaster (fine) 3.636174 29.250009
## AH-2        Ad Halom                 floor 1.946490  9.981413
## AH-3        Ad Halom lime plaster (coarse) 4.192486 19.321298
## AH-4        Ad Halom              mudbrick 2.542779  4.414064
## AH-5        Ad Halom              mudbrick 1.471627  7.026742
## AH-6        Ad Halom         floor plaster 1.774556 40.504665
## AH-7        Ad Halom lime plaster (coarse) 4.941322 18.079922
## AH-8        Ad Halom                 floor 2.262738 15.868588
## AH-9        Ad Halom   lime plaster (fine) 1.674261 33.459065
## TI-1    Tell Iztabba              mudbrick 2.056555 42.038495
## TI-2    Tell Iztabba              mudbrick 1.847381 42.608634
## TI-3    Tell Iztabba              mudbrick 3.595561 29.149144
## TI-4    Tell Iztabba              mudbrick 4.892311 29.255047
## TI-5    Tell Iztabba              mudbrick 3.460312 27.795385
## TI-6    Tell Iztabba              mudbrick 3.169395 33.345746
## TI-7    Tell Iztabba              mudbrick 2.381649 40.992167
## TI-8    Tell Iztabba              mudbrick 3.088721 26.134740
## TI-10   Tell Iztabba              mudbrick 2.061447 41.299332
## RLZ-1 Rishon Le'Zion              mudbrick 2.364044  3.727290

TGA dataset:

tga[c(8:9,10:11)]
##              context     type     c550      c950
## AH-4        Ad Halom mudbrick 2.789774  4.486895
## AH-5        Ad Halom mudbrick 1.106870  4.457738
## RLZ-1 Rishon Le'Zion mudbrick 2.604865  4.169125
## TI-1    Tell Iztabba mudbrick 1.893905 42.031107
## TI-2    Tell Iztabba mudbrick 1.707268 42.691722
## TI-3    Tell Iztabba mudbrick 3.102582 30.945771
## TI-4    Tell Iztabba mudbrick 4.991016 30.111368
## TI-5    Tell Iztabba mudbrick 3.279413 28.194114
## TI-6    Tell Iztabba mudbrick 2.922239 34.000000
## TI-7    Tell Iztabba mudbrick 2.051722 41.455573
## TI-8    Tell Iztabba mudbrick 2.931780 29.661181
## TI-10   Tell Iztabba mudbrick 2.014504 41.949927

Manual LOI results

NOTE! Remember to remove Ah-Halom floor and plaster samples from boxplots!

# Colored boxplots in same graph
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") # Reshaping data to long form

ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI by temperature",
        x ="Temperature", y = "LOI")

ggplot(loi_long, aes(x = context, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Traditional LOI by context",
        x ="Context", y = "LOI")

# Color by context
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by type      
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(context), colour = factor(type))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Colored boxplots in same graph
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") # Reshaping data to long form

ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI by temperature",
        x ="Temperature", y = "LOI")

ggplot(tga_long, aes(x = context, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI by context",
        x ="Context", y = "LOI")

# Color by context biplot
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_israel.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :43.75   Min.   : 3.160   Min.   :13.85  
##  1st Qu.:56.08   1st Qu.: 3.770   1st Qu.:14.71  
##  Median :64.30   Median : 4.350   Median :15.85  
##  Mean   :63.19   Mean   : 6.649   Mean   :17.27  
##  3rd Qu.:71.51   3rd Qu.: 6.280   3rd Qu.:17.70  
##  Max.   :75.34   Max.   :19.530   Max.   :25.84
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Palaepaphos

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_PP.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Mn 
##     0     0     0     0     0     1     1     0     2     0     0     7     0 
##    Fe    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Rh    Ag 
##     0     0     0     0    14     1     0     2     0    17    15

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-V) %>% 
        select(-As) %>% 
        select(-Rh) %>% 
        select(-Ag)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pxrf_MB: Final analysis data set for MUDBRICK samples only (= no soil)

pxrf_MB <- pxrf_final[c(1:11), ]
rownames(pxrf_MB)
##  [1] "PP-10" "PP-11" "PP-12" "PP-13" "PP-9"  "PP-19" "PP-14" "PP-15" "PP-16"
## [10] "PP-17" "PP-18"

pXRF: K means cluster analysis

Only mudbrick samples

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_MB[3:12], 3), data = pxrf_MB, label = TRUE, label.size = 3)

pXRF: PCA

PCA with only mudbrick samples:

# Elements
colnames(pxrf_MB[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_MB[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.4929 1.3379 0.93597 0.84675 0.40245 0.34084 0.22919
## Proportion of Variance 0.6222 0.1792 0.08771 0.07178 0.01622 0.01163 0.00526
## Cumulative Proportion  0.6222 0.8014 0.88911 0.96089 0.97711 0.98874 0.99400
##                            PC8    PC9    PC10
## Standard deviation     0.21723 0.1096 0.02751
## Proportion of Variance 0.00472 0.0012 0.00008
## Cumulative Proportion  0.99872 0.9999 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")

autoplot(pca_1, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

PCA(pxrf_MB[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with soil samples:

# Elements
colnames(pxrf_final[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_2 <- prcomp(pxrf_final[3:12])
summary(pca_2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.1869 1.4150 1.1695 0.89935 0.64284 0.45968 0.33902
## Proportion of Variance 0.4864 0.2036 0.1391 0.08225 0.04202 0.02149 0.01169
## Cumulative Proportion  0.4864 0.6900 0.8291 0.91133 0.95335 0.97484 0.98653
##                            PC8     PC9    PC10
## Standard deviation     0.27239 0.21974 0.10001
## Proportion of Variance 0.00755 0.00491 0.00102
## Cumulative Proportion  0.99407 0.99898 1.00000
# PCA plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Palaepaphos elements")

autoplot(pca_2, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

autoplot(pca_2, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by sample type")

PCA(pxrf_final[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

HCA including only MB samples, colored by area:

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB %>%                         # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=6)

area <- as.factor(pxrf_MB[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
rect.dendrogram(dend, 4, border = "Black")
legend("topright", legend = levels(area), fill = cols_a)

HCA with soil samples, colored by type:

# HCA dendrogram, samples color coded by type:
dend2 <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=6)

type <- as.factor(pxrf_final[, 2])
n_type <- length(unique(type))
cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
col_type <- cols_t[type]
labels_colors(dend2) <- col_type[order.dendrogram(dend2)]

plot(dend2, main="HCA with new samples by type")
rect.dendrogram(dend2, 6, border = "Black")
legend("topright", legend = levels(type), fill = cols_t)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_PP.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Creating subsets for only mudbricks
MB_grain <- grain_01[c(1:11),]
MB_context <- grain[c(1:11),]

# Ternary plots
ggtern(data=MB_grain, aes(x=Clay, y=Sand, z=Silt, color=MB_context$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(MB_grain)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Only MB samples
grain_MB2 <- grain[c(1:11),]
grain_long3 <- melt(grain_MB2[c(1,14:113)], id = "Sample") 

ggplot(grain_long3, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves - MB only")

Barplots for 5 size fractions:

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    ggtitle("Grain size distribution by fractions")

# Stacked barplot
grain_long4 <- melt(grain_MB2[c(1,9:13)], id = "Sample") 

ggplot(grain_long4, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    ggtitle("Grain size distribution by fractions - MB only")

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_PP.xlsx", sep=";")
tga <- read.xlsx("data/tga_PP.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

# Trad LOI: Creating MB only subset
loi_MB <- subset(loi[9:19, ])
loi_MB_long <- loi_MB[c(8,13:14)] 
loi_MB_long <- melt(loi_MB_long, id = "context") # Reshaping data to long form

# TGA: Creating MB only subset (with no duplicates)
tga_MB <- subset(tga[9:20, ])
tga_MB <- subset(tga_MB[-4, ])

# TGA LOI: Creating MB only subset (duplicates included)
tga_MB1 <- subset(tga[9:22, ])
tga_MB_long <- tga_MB1[c(8,10:11)] 
tga_MB_long <- melt(tga_MB_long, id = "context") # Reshaping data to long form

LOI dataset:

loi[c(8:9,13:14)]
##                 context         type     c550     c950
## PP-1   Hadjuabudllah l1 soil layer 1 5.986894 19.67880
## PP-2   Hadjuabudllah l2 soil layer 2 4.466405 17.59870
## PP-3      Laona soil l1 soil layer 1 5.734589 21.55943
## PP-4      Laona soil l2 soil layer 2 4.323061 17.76702
## PP-5      Laona soil l3 soil layer 3 6.877261 19.61697
## PP-6      Laona soil l1 soil layer 1 5.038672 17.28507
## PP-7      Laona soil l2 soil layer 2 5.582797 17.71488
## PP-8      Laona soil l3 soil layer 3 5.134295 19.63221
## PP-9             LA54:4     mudbrick 7.451505 18.35791
## PP-10            LA54:4     mudbrick 3.557623 14.55842
## PP-11            LA54:4     mudbrick 4.002255 21.24757
## PP-12            LA54:4     mudbrick 2.797378 22.23702
## PP-13            LA54:4     mudbrick 3.156025 21.81049
## PP-14            LA59:2     mudbrick 2.818483 22.65698
## PP-15            LA59:2     mudbrick 3.822946 25.15246
## PP-16            LA59:2     mudbrick 2.365256 21.25664
## PP-17            LA59:2     mudbrick 3.860836 20.58203
## PP-18            LA59:2     mudbrick 2.451871 33.21076
## PP-19            LA54:7     mudbrick 1.739797 33.78112
## PP-1_2 Hadjuabudllah l1 soil layer 1 6.137732 21.18259
## PP-2_2 Hadjuabudllah l2 soil layer 2 4.429557 18.22193
## PP-3_2    Laona soil l1 soil layer 1 5.521160 22.61670
## PP-4_2    Laona soil l2 soil layer 2 4.582459 20.95611
## PP-5_2    Laona soil l3 soil layer 3 6.888629 20.42575
## PP-6_2    Laona soil l1 soil layer 1 5.348806 16.70897
## PP-7_2    Laona soil l2 soil layer 2 5.537857 18.92266
## PP-8_2    Laona soil l3 soil layer 3 5.230428 19.99844

TGA dataset:

tga[c(8:9,10:11)]
##                    context         type     c550     c950
## PP-1      Hadjuabudllah l1 soil layer 1 5.328983 22.05629
## PP-2      Hadjuabudllah l2 soil layer 2 3.798409 18.86880
## PP-3         Laona soil l1 soil layer 1 4.452588 21.57446
## PP-4         Laona soil l2 soil layer 2 4.537622 20.07908
## PP-5         Laona soil l3 soil layer 3 4.657919 23.10036
## PP-6         Laona soil l1 soil layer 1 4.892221 16.89988
## PP-7         Laona soil l2 soil layer 2 4.601116 18.47942
## PP-8         Laona soil l3 soil layer 3 4.525488 20.99294
## PP-9                LA54:4     mudbrick 5.392111 18.62861
## PP-10               LA54:4     mudbrick 2.360845 15.24474
## PP-11               LA54:4     mudbrick 2.773246 24.52314
## PP-11 (2)           LA54:4     mudbrick 3.164817 20.66002
## PP-12               LA54:4     mudbrick 2.734021 21.45383
## PP-13               LA54:4     mudbrick 3.235943 21.34319
## PP-14               LA59:2     mudbrick 3.202329 22.49060
## PP-15               LA59:2     mudbrick 3.508931 22.92802
## PP-16               LA59:2     mudbrick 2.215719 19.75916
## PP-17               LA59:2     mudbrick 3.921377 22.16794
## PP-18               LA59:2     mudbrick 3.234501 31.05334
## PP-19               LA54:7     mudbrick 2.204428 33.79251
## PP-10 (2)           LA54:4     mudbrick 2.226568 14.54839
## PP-9 (2)            LA54:4     mudbrick 5.258741 19.01388
## PP-8 (2)     Laona soil l3 soil layer 3 4.474886 22.50903
## PP-7 (2)     Laona soil l3 soil layer 2 4.739991 19.00483
## PP-6 (2)     Laona soil l1 soil layer 1 4.618851 18.27756
## PP-5 (2)     Laona soil l3 soil layer 3 5.122344 22.59504
## PP-4 (2)     Laona soil l2 soil layer 2 4.559860 20.33749
## PP-3 (2)     Laona soil l1 soil layer 1 4.302926 20.35644

Boxplots

# Colored boxplots: Trad LOI, MB only
ggplot(loi_MB_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI (MB only)",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI, MB only
ggplot(tga_MB_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI (MB only)",
        x ="Temperature", y = "LOI")

# Soil samples included: data preparation
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") # Reshaping data to long form

tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") # Reshaping data to long form

# Colored boxplots: Trad LOI, soil samples included
ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI (soil included)",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI, soil samples included
ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI (soil included)",
        x ="Temperature", y = "LOI")

Biplots

# MB only biplot
ggplot(loi_MB, 
      aes(c550, c950, label = rownames(loi_MB), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Trad LOI: MB only, organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by context biplot
ggplot(tga_MB, 
      aes(c550, c950, label = rownames(tga_MB), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI: MB only, organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by context biplot: Trad LOI
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + 
      labs(title = "Trad LOI, soil included - color by context") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by context biplot: TGA
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI, soil included - color by context") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by type biplot: Trad LOI     
ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(colour = factor(type))) +
      labs(title = "Trad LOI, soil included - color by type") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Color by type biplot: TGA     
ggplot(tga, 
      aes(c550, c950)) +
      geom_point(size=2, aes(colour = factor(type))) +
      labs(title = "TGA LOI, soil included - color by type") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

Stacked barplots

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi_MB, key = Temp, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = Temp)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Traditional LOI (MB only)", 
         x = "", y = "LOI (%)")

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga_MB, key = Temp, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = Temp)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Thermogravimeter LOI (MB only)", 
         x = "", y = "LOI (%)")

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_PP.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :62.38   Min.   : 7.27   Min.   :19.90  
##  1st Qu.:63.47   1st Qu.: 8.50   1st Qu.:21.36  
##  Median :64.19   Median : 9.34   Median :21.72  
##  Mean   :65.50   Mean   : 9.29   Mean   :21.92  
##  3rd Qu.:65.78   3rd Qu.:10.56   3rd Qu.:22.96  
##  Max.   :72.00   Max.   :10.90   Max.   :23.90
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Artaxata

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AA.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[4:93] <- pxrf[4:93] %>% mutate_if(is.character,as.numeric)

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$Group.1
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[3:47] <- scale(pxrf_scaled[3:47])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_averaged: All averaged data including averaged errors (Uine’s note: for later use)

colnames(pxrf_averaged)
##  [1] "Area"      "Type"      "MgO"       "MgO;Err"   "Al2O3"     "Al2O3;Err"
##  [7] "SiO2"      "SiO2;Err"  "P2O5"      "P2O5;Err"  "S"         "S;Err"    
## [13] "Cl"        "Cl;Err"    "K2O"       "K2O;Err"   "CaO"       "CaO;Err"  
## [19] "Ti"        "Ti;Err"    "V"         "V;Err"     "Cr"        "Cr;Err"   
## [25] "Mn"        "Mn;Err"    "Fe"        "Fe;Err"    "Co"        "Co;Err"   
## [31] "Ni"        "Ni;Err"    "Cu"        "Cu;Err"    "Zn"        "Zn;Err"   
## [37] "As"        "As;Err"    "Se"        "Se;Err"    "Rb"        "Rb;Err"   
## [43] "Sr"        "Sr;Err"    "Y"         "Y;Err"     "Zr"        "Zr;Err"   
## [49] "Nb"        "Nb;Err"    "Mo"        "Mo;Err"    "Rh"        "Rh;Err"   
## [55] "Pd"        "Pd;Err"    "Ag"        "Ag;Err"    "Cd"        "Cd;Err"   
## [61] "Sn"        "Sn;Err"    "Sb"        "Sb;Err"    "Ba"        "Ba;Err"   
## [67] "La"        "La;Err"    "Ce"        "Ce;Err"    "Hf"        "Hf;Err"   
## [73] "Ta"        "Ta;Err"    "W"         "W;Err"     "Pt"        "Pt;Err"   
## [79] "Au"        "Au;Err"    "Hg"        "Hg;Err"    "Tl"        "Tl;Err"   
## [85] "Pb"        "Pb;Err"    "Bi"        "Bi;Err"    "Th"        "Th;Err"   
## [91] "U"         "U;Err"

pxrf_all: All elements (the numbers below are the amount of NA values)

colSums(is.na(pxrf_all))
##  Area  Type   MgO Al2O3  SiO2  P2O5     S    Cl   K2O   CaO    Ti     V    Cr 
##     0     0     0     0     0     1     0     0     0     0     0     5     8 
##    Mn    Fe    Ni    Cu    Zn    As    Rb    Sr     Y    Zr    Nb    Ba 
##     0     0     0     0     0     1     0     0     0     0     4     3

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-V) %>% 
        select(-As) %>% 
        select(-Nb) %>% 
        select(-Cr)  %>% 
        select(-Ba)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final)
##  [1] "Area" "Type" "MgO"  "Ti"   "Mn"   "Fe"   "Cu"   "Zn"   "Rb"   "Sr"  
## [11] "Y"    "Zr"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:12], 4), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[3:12])
##  [1] "MgO" "Ti"  "Mn"  "Fe"  "Cu"  "Zn"  "Rb"  "Sr"  "Y"   "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6    PC7
## Standard deviation     2.6820 1.1972 0.84041 0.53728 0.45231 0.32524 0.2122
## Proportion of Variance 0.7193 0.1433 0.07063 0.02887 0.02046 0.01058 0.0045
## Cumulative Proportion  0.7193 0.8627 0.93328 0.96215 0.98261 0.99318 0.9977
##                            PC8     PC9 PC10
## Standard deviation     0.14003 0.05947    0
## Proportion of Variance 0.00196 0.00035    0
## Cumulative Proportion  0.99965 1.00000    1
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Artaxata elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Artaxata grouped by area")

PCA(pxrf_final[3:12])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 10 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    color_branches(k=5)

area <- as.factor(pxrf_final[, 1])
n_area <- length(unique(area))
cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
col_area <- cols_a[area]
labels_colors(dend) <- col_area[order.dendrogram(dend)]

plot(dend, main="HCA with sample areas")
rect.dendrogram(dend, 5, border = "Black")
legend("topright", legend = levels(area), fill = cols_a)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_AA.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_AA.xlsx", sep=";")
tga <- read.xlsx("data/tga_AA.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

LOI dataset:

loi[c(8:9,13:14)]
##               context     type     c550     c950
## AA-1          Hill II mudbrick 5.610986 5.756990
## AA-2          Hill II mudbrick 4.711074 7.965070
## AA-3          Hill II mudbrick 4.035368 7.841600
## AA-4    Urartian silo mudbrick 3.080904 7.738517
## AA-5          Phase I mudbrick 2.940399 6.257292
## AA-6          Phase I mudbrick 3.125140 5.507166
## AA-7  Phase II or III mudbrick 4.573474 6.267972
## AA-8  Phase II or III mudbrick 3.769098 7.238074
## AA-9         Phase II mudbrick 3.696483 6.966079
## AA-10        Phase II mudbrick 5.181930 5.580024

TGA dataset:

tga[c(8:9,10:11)]
##                  context     type     c550     c950
## AA-1             Hill II mudbrick 5.811508 5.611934
## AA-2             Hill II mudbrick 4.751337 8.599873
## AA-3             Hill II mudbrick 4.078462 7.997570
## AA-4       Urartian silo mudbrick 3.403442 7.086302
## AA-4 (2)   Urartian silo mudbrick 3.415174 7.167431
## AA-5             Phase I mudbrick 2.799218 6.532557
## AA-6             Phase I mudbrick 3.118409 4.097341
## AA-7     Phase II or III mudbrick 4.395924 6.082521
## AA-8     Phase II or III mudbrick 3.689788 6.722017
## AA-9            Phase II mudbrick 4.417015 6.467449
## AA-10           Phase II mudbrick 4.307988 6.154440

Manual LOI results

# Colored boxplots in same graph
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") # Reshaping data to long form

ggplot(loi_long, aes(x = context, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Traditional LOI by context",
        x ="Context", y = "LOI")

ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI by temperature",
        x ="Temperature", y = "LOI")

# Color by context
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Colored boxplots in same graph
tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") # Reshaping data to long form

ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI by temperature",
        x ="Temperature", y = "LOI")

ggplot(tga_long, aes(x = context, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI by context",
        x ="Context", y = "LOI")

# Color by context
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AA.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :51.37   Min.   :3.880   Min.   :15.92  
##  1st Qu.:57.73   1st Qu.:4.402   1st Qu.:16.49  
##  Median :58.79   Median :4.555   Median :16.93  
##  Mean   :58.18   Mean   :5.067   Mean   :17.32  
##  3rd Qu.:59.58   3rd Qu.:4.695   3rd Qu.:17.32  
##  Max.   :61.57   Max.   :8.670   Max.   :21.47
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)